# DATA PREPARATION

# outcomes
outcomes <- d[, names(d)[grep("_PAIR[[:digit:]]{1}$", names(d))] ]
## format outcomes to match feature variables (for reshaping)
names(outcomes) <- paste0(substr(names(outcomes), 1, nchar(names(outcomes))-6L), 
                          "_t", 
                          substr(names(outcomes), nchar(names(outcomes)), nchar(names(outcomes))))
# drop unneeded indicator columns
outcomes[, paste0("QB2grid_t", 1:5)] <- NULL

# setup new dataset 
conj <- cbind(
  # demographics
  d[, seq_len(which(names(d) == "disposition"))],
  # conjoint features
  d[, names(d)[grep("_t[[:digit:]]{1}$", names(d))] ],
  # outcomes
  outcomes
)
## convert _seen1 and _seen2 variables to A/B
names(conj) <- gsub("_seen1", "A", names(conj))
names(conj) <- gsub("_seen2", "B", names(conj))
names(conj) <- gsub("^QB", "Q", names(conj))
names(conj) <- gsub("grid", "", names(conj))

rm(outcomes)

##QB1_PAIR1       # binary choice
##QB2gridA_PAIR1  # rating A
##QB2gridB_PAIR1  # rating B
##QB4_PAIR1       # respects referendum
##QB5_PAIR1       # likelihood of choice

# stack profiles
conj <- reshape(conj, 
    varying = list(
      names(conj)[grep("^imigrationA", names(conj))],
      names(conj)[grep("^imigrationB", names(conj))],
      names(conj)[grep("^rightsA", names(conj))],
      names(conj)[grep("^rightsB", names(conj))],
      names(conj)[grep("^tradeA", names(conj))],
      names(conj)[grep("^tradeB", names(conj))],
      names(conj)[grep("^lawsA", names(conj))],
      names(conj)[grep("^lawsB", names(conj))],
      names(conj)[grep("^budgetA", names(conj))],
      names(conj)[grep("^budgetB", names(conj))],
      names(conj)[grep("^paymentA", names(conj))],
      names(conj)[grep("^paymentB", names(conj))],
      names(conj)[grep("^borderA", names(conj))],
      names(conj)[grep("^borderB", names(conj))],
      names(conj)[grep("^timelineA", names(conj))],
      names(conj)[grep("^timelineB", names(conj))],
      names(conj)[grep("^Q1", names(conj))],
      names(conj)[grep("^Q2A", names(conj))],
      names(conj)[grep("^Q2B", names(conj))],
      names(conj)[grep("^Q4", names(conj))],
      names(conj)[grep("^Q5", names(conj))]
    ),
    v.names = c("immigrationA", "immigrationB", 
                "rightsA", "rightsB",
                "tradeA", "tradeB",
                "lawsA", "lawsB",
                "budgetA", "budgetB",
                "paymentA", "paymentB",
                "borderA", "borderB",
                "timelineA", "timelineB",
                "Q1", "Q2A", "Q2B", "Q4", "Q5"),
    timevar = "pair",
    idvar = "ID",
    direction = "long"
)
stopifnot(nrow(conj) == 16465)

## create conjoint profile display pair variable
conj[["id_pair"]] <- paste0(conj[["ID"]], "_", conj[["pair"]])

# prepare data for stacking
conj[["Q1A"]] <- ifelse(conj[["Q1"]] == 1, 1, 0)
conj[["Q1B"]] <- ifelse(conj[["Q1"]] == 2, 1, 0)
conj[["Q4A"]] <- ifelse(conj[["Q4"]] %in% c(1,3), 1, 0)
conj[["Q4B"]] <- ifelse(conj[["Q4"]] %in% c(2,3), 1, 0)
conj[["Q5A"]] <- ifelse(conj[["Q1"]] == 1, conj[["Q5"]], NA_real_)
conj[["Q5B"]] <- ifelse(conj[["Q1"]] == 2, conj[["Q5"]], NA_real_)
conj[["Q1"]] <- NULL
conj[["Q4"]] <- NULL
conj[["Q5"]] <- NULL

# stack a/b options
conj <- reshape(conj,
    varying = list(
        c("immigrationA", "immigrationB"), 
        c("rightsA", "rightsB"),
        c("tradeA", "tradeB"),
        c("lawsA", "lawsB"),
        c("budgetA", "budgetB"),
        c("paymentA", "paymentB"),
        c("borderA", "borderB"),
        c("timelineA", "timelineB"),
        c("Q1A", "Q1B"),
        c("Q2A", "Q2B"),
        c("Q4A", "Q4B"),
        c("Q5A", "Q5B")
    ),
    v.names = c("immigration", "rights", "trade", "laws", 
                "budget", "payment", "border", "timeline",
                "Q1", "Q2", "Q4", "Q5"),
    timevar = "AB",
    idvar = "id_pair",
    direction = "long"
)
stopifnot(nrow(conj) == 32930) 
conj[["AB"]] <- c("A", "B")[conj[["AB"]]]

# convert feature variables to labeled factors
recode_feature <- function(x) {
    a <- attributes(conj[[x]])$labels
    lvls <- gsub("[\\x{0091}\\x{0092}]", "", names(a), perl = TRUE)
    r <- paste0(paste0(a, '="', lvls, '"', collapse = ";"), ";else=NA")
    #print(r)
    recode(conj[[x]], r, as.factor = TRUE)
}
conj[["immigration"]] <- recode_feature("immigration")
conj[["immigration"]] <- factor(conj[["immigration"]], 
                                levels = c(
                                  "Full control over EU immigration and little to no EU immigration",
                                  "Full control over EU immigration and lower levels of EU immigration than now",
                                  "Full control over EU immigration and similar levels of EU immigration to now",
                                  "Some control over EU immigration and lower levels of EU immigration than now",
                                  "Some control over EU immigration and similar levels of EU immigration to now",
                                  "No control over EU immigration and similar levels of EU immigration to now"
                                  )
                                )
conj[["rights"]] <- recode_feature("rights")
levels(conj[["rights"]]) <- gsub("[\\x{0091}\\x{0092}]", "", levels(conj$rights), perl = TRUE)
conj[["rights"]] <- factor(conj[["rights"]],
                           levels = c("All must leave",
                                      "Must apply for leave to remain under the same terms as people from non-EU countries",
                                      "Must apply for leave to remain under less restrictive terms than people from non-EU countries",
                                      "Can stay if they continue to work while all others must leave",
                                      "All can stay indefinitely"
                                      )
                           )
conj[["trade"]] <- recode_feature("trade")
conj[["trade"]] <- factor(conj[["trade"]], 
                          levels = c("Many administrative barriers to trade in goods and services and 5% average tariff on goods",
                                     "Many administrative barriers to trade in goods and services and 2.5% average tariff on goods",
                                     "Many administrative barriers to trade in goods and services and no tariffs on goods",
                                     "Some administrative barriers to trade in goods and services and 5% average tariff on goods",
                                     "Some administrative barriers to trade in goods and services and 2.5% average tariff on goods",
                                     "Some administrative barriers to trade in goods and services and no tariffs on goods",
                                     "Few administrative barriers to trade in goods and services and 5% average tariff on goods",
                                     "Few administrative barriers to trade in goods and services and 2.5% average tariff on goods",
                                     "Few administrative barriers to trade in goods and services and no tariffs on goods"
                                     )
                                     )
conj[["laws"]] <- recode_feature("laws")
conj[["laws"]] <- factor(conj[["laws"]], 
                         levels = c("Britain is not subject to EU laws or decisions by the European Court of Justice",
                                    "Britain adopts some EU laws but is not subject to decisions by the European Court of Justice",
                                    "Britain is subject to some EU laws and some decisions by the European Court of Justice",
                                    "Britain is subject to all EU laws and all decisions by the European Court of Justice"
                                    )
                         )
conj[["budget"]] <- recode_feature("budget")
conj[["budget"]] <- factor(conj[["budget"]], 
                           levels = c("No contribution and no access",
                                      "£1 billion per year for access",
                                      "£6 billion per year for access",
                                      "£12 billion per year for access"
                                      )
                           )
conj[["payment"]] <- recode_feature("payment")
conj[["payment"]] <- factor(conj[["payment"]],
                            levels = c("No payment",
                                       "£10 billion",
                                       "£20 billion",
                                       "£50 billion",
                                       "£70 billion"
                            ))
conj[["border"]] <- recode_feature("border")
conj[["border"]] <- factor(conj[["border"]], 
                           levels = c("Full passport and customs checks",
                                      "Passport checks but no customs checks",
                                      "Customs checks but no passport checks",
                                      "No passport checks and no customs checks"
                                      )
                           )
conj[["timeline"]] <- recode_feature("timeline")
conj[["timeline"]] <- factor(conj[["timeline"]], levels = c("2019", "2020", "2021", "2023", "2025"))

# recode outcomes
conj[["Q2"]] <- recode(conj[["Q2"]], "1=1;2=0.67;3=0.33;4=0;else=NA")


# voted leave
conj[["votedleave"]] <- recode(conj[["pastvote_EURef"]], "1=0;2=1;else=NA")

# EXPORT DATA FOR USE LATER
#rio::export(conj, "../data/conjoint-data-stacked.dta")
rio::export(conj, "../data/conjoint-data-stacked.rds")
